# -- coding: utf-8 --
'''
# @Time : 2023/06/25 21:53
# @Author : Shiyu He
# @University : Xinjiang University
'''

import os
import shutil
import subprocess
import wave
from pydub import AudioSegment

# 该脚本只能在windows下的funasr环境中使用，因为只有该环境装了完整版ffmpeg
# 指定输入和输出文件夹的路径
input_folder = r"F:\duwx\class3_10w_mp3"
output_folder = r"F:\duwx\class3_10w_wav"
count = 1
# 遍历输入文件夹中的所有 MP3 文件
for filename in os.listdir(input_folder):
    if filename.endswith(".mp3"):
        # 构造输入文件路径和输出文件路径
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder,
                                   os.path.splitext(filename)[0] + ".wav")

        if os.path.exists(output_path):
            print(f'wav文件{output_path}已存在')
            continue

        try:
            # 使用 pydub 库读取 MP3 文件，然后转换为 WAV 格式
            audio = AudioSegment.from_file(input_path, format="mp3")
            audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
            audio.export(output_path, format="wav")

            # 使用 wave 模块检查 WAV 文件的采样率是否符合要求
            with wave.open(output_path, "rb") as wav_file:
                sample_rate = wav_file.getframerate()
                sample_width = wav_file.getsampwidth()
                channels = wav_file.getnchannels()

                if sample_rate != 16000 or sample_width != 2 or channels != 1:
                    # 如果采样率不符合要求，则使用 ffmpeg 进行转换
                    temp_path = output_path + ".temp.wav"
                    command = f"ffmpeg -i {output_path} -ar 16000 -ac 1 -sample_fmt s16 {temp_path}"
                    subprocess.call(command, shell=True)
                    shutil.move(temp_path, output_path)

            print(f"第 {count} Converted {input_path} to {output_path}")
        except Exception as e:
            print(f"audio {filename} Exception:", e)

    count += 1
